1 Podsumowanie analizy

TODO

2 Biblioteki

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

3 Wczytanie danych

data <- read.csv("mp_batteries.csv")

columns <- names(data)
string_columns <- c("Battery.Formula", "Working.Ion", "Formula.Charge", "Formula.Discharge")
numeric_columns <- setdiff(columns, c(string_columns, "Battery.ID"))

4 Podstawowe statystyki

nrow(data)
## [1] 4351
summary(data)
##   Battery.ID        Battery.Formula    Working.Ion        Formula.Charge    
##  Length:4351        Length:4351        Length:4351        Length:4351       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Formula.Discharge  Max.Delta.Volume    Average.Voltage  Gravimetric.Capacity
##  Length:4351        Min.   :  0.00002   Min.   :-7.755   Min.   :   5.176    
##  Class :character   1st Qu.:  0.01747   1st Qu.: 2.226   1st Qu.:  88.108    
##  Mode  :character   Median :  0.04203   Median : 3.301   Median : 130.691    
##                     Mean   :  0.37531   Mean   : 3.083   Mean   : 158.291    
##                     3rd Qu.:  0.08595   3rd Qu.: 4.019   3rd Qu.: 187.600    
##                     Max.   :293.19322   Max.   :54.569   Max.   :2557.627    
##  Volumetric.Capacity Gravimetric.Energy Volumetric.Energy
##  Min.   :  24.08     Min.   :-583.5     Min.   :-2208.1  
##  1st Qu.: 311.62     1st Qu.: 211.7     1st Qu.:  821.6  
##  Median : 507.03     Median : 401.8     Median : 1463.8  
##  Mean   : 610.62     Mean   : 444.1     Mean   : 1664.0  
##  3rd Qu.: 722.75     3rd Qu.: 614.4     3rd Qu.: 2252.3  
##  Max.   :7619.19     Max.   :5926.9     Max.   :18305.9  
##  Atomic.Fraction.Charge Atomic.Fraction.Discharge Stability.Charge 
##  Min.   :0.00000        Min.   :0.007407          Min.   :0.00000  
##  1st Qu.:0.00000        1st Qu.:0.086957          1st Qu.:0.03301  
##  Median :0.00000        Median :0.142857          Median :0.07319  
##  Mean   :0.03986        Mean   :0.159077          Mean   :0.14257  
##  3rd Qu.:0.04762        3rd Qu.:0.200000          3rd Qu.:0.13160  
##  Max.   :0.90909        Max.   :0.993333          Max.   :6.48710  
##  Stability.Discharge     Steps       Max.Voltage.Step 
##  Min.   :0.00000     Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.01952     1st Qu.:1.000   1st Qu.: 0.0000  
##  Median :0.04878     Median :1.000   Median : 0.0000  
##  Mean   :0.12207     Mean   :1.167   Mean   : 0.1503  
##  3rd Qu.:0.09299     3rd Qu.:1.000   3rd Qu.: 0.0000  
##  Max.   :6.27781     Max.   :6.000   Max.   :26.9607

5 Analiza wartości atrybutów

5.1 Atrybuty tekstowe

counts <- data.frame()
for (col in string_columns)
{
  col_counts <- data %>%
    count(get(col), name = "Count") %>%
    arrange(desc(Count))
  col_counts$var <- col
  counts <- rbind(counts, col_counts)
  
  top_10_counts <- col_counts %>%
    slice(1:10)
  print(top_10_counts)
}
##             get(col) Count             var
## 1         Li0-1V2OF5    19 Battery.Formula
## 2         Li0-1CoPO4    18 Battery.Formula
## 3         Li0-1FePO4    18 Battery.Formula
## 4  Li0-3MnFeCo(PO4)3    17 Battery.Formula
## 5         Li0-1MnPO4    15 Battery.Formula
## 6        Li0-1V4OF11    15 Battery.Formula
## 7        Li0-1V4O5F7    12 Battery.Formula
## 8           Li0-1VF5    12 Battery.Formula
## 9        Li0-1CrP2O7    11 Battery.Formula
## 10       Li0-2MnP2O7    11 Battery.Formula
##    get(col) Count         var
## 1        Li  2440 Working.Ion
## 2        Ca   435 Working.Ion
## 3        Mg   423 Working.Ion
## 4        Zn   366 Working.Ion
## 5        Na   309 Working.Ion
## 6         K   107 Working.Ion
## 7        Al    95 Working.Ion
## 8         Y    93 Working.Ion
## 9        Rb    50 Working.Ion
## 10       Cs    33 Working.Ion
##    get(col) Count            var
## 1      MnO2    49 Formula.Charge
## 2      TiO2    47 Formula.Charge
## 3       VO2    46 Formula.Charge
## 4      CrO2    45 Formula.Charge
## 5      CoO2    43 Formula.Charge
## 6      NiO2    41 Formula.Charge
## 7      FeO2    36 Formula.Charge
## 8     FePO4    26 Formula.Charge
## 9       WO2    25 Formula.Charge
## 10    CoPO4    24 Formula.Charge
##           get(col) Count               var
## 1          LiCoPO4    19 Formula.Discharge
## 2          LiFePO4    19 Formula.Discharge
## 3          LiMnPO4    19 Formula.Discharge
## 4          LiV2OF5    19 Formula.Discharge
## 5     Li5Mn6(BO3)6    18 Formula.Discharge
## 6  Li3MnFeCo(PO4)3    17 Formula.Discharge
## 7         LiV4OF11    15 Formula.Discharge
## 8        Li2MnP2O7    14 Formula.Discharge
## 9        Li2FeSiO4    13 Formula.Discharge
## 10         LiCrPO4    12 Formula.Discharge
ggplot(counts, aes(x = Count)) +
geom_histogram(binwidth = 1, fill = "green", alpha = 0.7) +
labs(
  title = paste("Liczba wystąpień wartości dla zmiennej"),
  x = "Liczba wystąpień",
  y = "Liczba różnych wartości"
) +
facet_wrap(~var, scales="free") +
theme_minimal()

5.2 Atrybuty liczbowe

numeric_df <- data[, numeric_columns]
numeric_df_long <- numeric_df %>%
  pivot_longer(colnames(numeric_df)) %>% 
  as.data.frame()

ggplot(numeric_df_long, aes(x = value)) +
  geom_histogram(fill = "green", alpha = 0.7) + 
  facet_wrap(~ name, scales = "free") +
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

6 Korelacja

numeric_df <- data[, numeric_columns]

# Calculate the correlation matrix
correlation_matrix <- cor(numeric_df)

# Convert the matrix to a data frame for saving
correlation_df <- as.data.frame(as.table(correlation_matrix))

# Rename the columns for clarity
names(correlation_df) <- c("x", "y", "cor")

correlation_df_one_dir <- correlation_df[as.character(correlation_df$x) < as.character(correlation_df$y), ]

Korelacja wszytskich par zmiennych numerycznych

print(correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ])
##                             x                         y          cor
## 65         Gravimetric.Energy         Volumetric.Energy  0.928325316
## 39       Gravimetric.Capacity       Volumetric.Capacity  0.858416267
## 117          Stability.Charge       Stability.Discharge  0.802870095
## 32  Atomic.Fraction.Discharge      Gravimetric.Capacity  0.680771641
## 50            Average.Voltage        Gravimetric.Energy  0.665652274
## 44  Atomic.Fraction.Discharge       Volumetric.Capacity  0.618018605
## 91     Atomic.Fraction.Charge Atomic.Fraction.Discharge  0.597415747
## 62            Average.Voltage         Volumetric.Energy  0.554519056
## 132          Max.Voltage.Step                     Steps  0.535253881
## 3        Gravimetric.Capacity          Max.Delta.Volume  0.433773349
## 137        Gravimetric.Energy          Max.Voltage.Step  0.329232198
## 64        Volumetric.Capacity         Volumetric.Energy  0.325748151
## 125        Gravimetric.Energy                     Steps  0.294607452
## 8   Atomic.Fraction.Discharge          Max.Delta.Volume  0.290692092
## 72           Max.Voltage.Step         Volumetric.Energy  0.252662508
## 37           Max.Delta.Volume       Volumetric.Capacity  0.242476920
## 71                      Steps         Volumetric.Energy  0.238142024
## 63       Gravimetric.Capacity         Volumetric.Energy  0.230421553
## 51       Gravimetric.Capacity        Gravimetric.Energy  0.213246271
## 38            Average.Voltage       Volumetric.Capacity -0.212817820
## 41         Gravimetric.Energy       Volumetric.Capacity  0.209840583
## 69           Stability.Charge         Volumetric.Energy  0.178327117
## 20  Atomic.Fraction.Discharge           Average.Voltage -0.171690301
## 101        Gravimetric.Energy          Stability.Charge  0.166981905
## 98            Average.Voltage          Stability.Charge  0.166137144
## 128 Atomic.Fraction.Discharge                     Steps  0.164171296
## 67     Atomic.Fraction.Charge         Volumetric.Energy -0.147352294
## 26            Average.Voltage      Gravimetric.Capacity -0.146222183
## 123      Gravimetric.Capacity                     Steps  0.133397655
## 31     Atomic.Fraction.Charge      Gravimetric.Capacity  0.128921011
## 110           Average.Voltage       Stability.Discharge -0.128456834
## 134           Average.Voltage          Max.Voltage.Step  0.127120819
## 47                      Steps       Volumetric.Capacity  0.103705117
## 140 Atomic.Fraction.Discharge          Max.Voltage.Step  0.101979645
## 45           Stability.Charge       Volumetric.Capacity  0.101530488
## 55     Atomic.Fraction.Charge        Gravimetric.Energy -0.097292412
## 135      Gravimetric.Capacity          Max.Voltage.Step  0.095190645
## 108          Max.Voltage.Step          Stability.Charge  0.094046568
## 2             Average.Voltage          Max.Delta.Volume -0.082370729
## 113        Gravimetric.Energy       Stability.Discharge -0.078260883
## 56  Atomic.Fraction.Discharge        Gravimetric.Energy  0.064524784
## 99       Gravimetric.Capacity          Stability.Charge  0.063387053
## 130       Stability.Discharge                     Steps -0.063168645
## 122           Average.Voltage                     Steps  0.062785083
## 48           Max.Voltage.Step       Volumetric.Capacity  0.062608533
## 68  Atomic.Fraction.Discharge         Volumetric.Energy  0.061058647
## 5          Gravimetric.Energy          Max.Delta.Volume -0.060985786
## 70        Stability.Discharge         Volumetric.Energy -0.059994876
## 61           Max.Delta.Volume         Volumetric.Energy -0.058832142
## 115    Atomic.Fraction.Charge       Stability.Discharge -0.052397088
## 19     Atomic.Fraction.Charge           Average.Voltage -0.038555643
## 129          Stability.Charge                     Steps -0.037485986
## 97           Max.Delta.Volume          Stability.Charge  0.033758650
## 104 Atomic.Fraction.Discharge          Stability.Charge  0.032405104
## 46        Stability.Discharge       Volumetric.Capacity  0.031701213
## 127    Atomic.Fraction.Charge                     Steps  0.029736912
## 103    Atomic.Fraction.Charge          Stability.Charge -0.027357138
## 7      Atomic.Fraction.Charge          Max.Delta.Volume  0.021315304
## 120          Max.Voltage.Step       Stability.Discharge -0.016555176
## 116 Atomic.Fraction.Discharge       Stability.Discharge  0.014320385
## 121          Max.Delta.Volume                     Steps -0.013258214
## 111      Gravimetric.Capacity       Stability.Discharge  0.012538984
## 133          Max.Delta.Volume          Max.Voltage.Step -0.009925145
## 109          Max.Delta.Volume       Stability.Discharge  0.007735655
## 139    Atomic.Fraction.Charge          Max.Voltage.Step  0.005342003
## 43     Atomic.Fraction.Charge       Volumetric.Capacity  0.001245630
p <- ggplot(correlation_df) +
  geom_tile(aes(x = x, y = y, fill = abs(cor), text = paste("Korelacja pomiędzy", x, "i", y, "=", abs(cor)))) +
  theme(axis.title = element_blank()) +
  labs(fill="Korelacja") +
  scale_fill_gradient(low="white", high="green") +
  theme_minimal()
## Warning in geom_tile(aes(x = x, y = y, fill = abs(cor), text = paste("Korelacja
## pomiędzy", : Ignoring unknown aesthetics: text
ggplotly(p, tooltip = "text") %>%
  layout(
    xaxis = list(
      tickangle = 45,
      title = ""
    ),
    yaxis = list(
      title = ""
    )
  )

Przedstawienie zależności 5 par zmiennych o najwyższej korelacji

top_5_correlation <- correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ] %>%
  slice(1:5)

print(top_5_correlation)
##                           x                    y       cor
## 1        Gravimetric.Energy    Volumetric.Energy 0.9283253
## 2      Gravimetric.Capacity  Volumetric.Capacity 0.8584163
## 3          Stability.Charge  Stability.Discharge 0.8028701
## 4 Atomic.Fraction.Discharge Gravimetric.Capacity 0.6807716
## 5           Average.Voltage   Gravimetric.Energy 0.6656523
ggplotly(
  ggplot(data, aes(x = Gravimetric.Energy, y = Volumetric.Energy)) +
    geom_point(aes(
      x = Gravimetric.Energy,
      y = Volumetric.Energy,
      text = paste("ID baterii:", Battery.ID,
      "\nGravimetric.Energy:",Gravimetric.Energy,
      "\nVolumetric.Energy", Volumetric.Energy
      )
    )) +
    geom_smooth(method = lm) +
    labs(title = paste("Gravimetric.Energy i Volumetric.Energy")) +
    theme_minimal(),
  tooltip = "text"
)
## Warning in geom_point(aes(x = Gravimetric.Energy, y = Volumetric.Energy, :
## Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Gravimetric.Capacity, y = Volumetric.Capacity)) +
  geom_point(aes(
    x = Gravimetric.Capacity, 
    y = Volumetric.Capacity, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nGravimetric.Capacity:", Gravimetric.Capacity, 
      "\nVolumetric.Capacity:", Volumetric.Capacity
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Gravimetric.Capacity i Volumetric.Capacity")) +
  theme_minimal(),
  tooltip = "text"
)
## Warning in geom_point(aes(x = Gravimetric.Capacity, y = Volumetric.Capacity, :
## Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Stability.Charge, y = Stability.Discharge)) +
  geom_point(aes(
    x = Stability.Charge, 
    y = Stability.Discharge, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nStability.Charge:", Stability.Charge, 
      "\nStability.Discharge:", Stability.Discharge
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Stability.Charge i Stability.Discharge")) +
  theme_minimal(),
  tooltip = "text"
)
## Warning in geom_point(aes(x = Stability.Charge, y = Stability.Discharge, :
## Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Atomic.Fraction.Discharge, y = Gravimetric.Capacity)) +
  geom_point(aes(
    x = Atomic.Fraction.Discharge, 
    y = Gravimetric.Capacity, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nAtomic.Fraction.Discharge:", Atomic.Fraction.Discharge, 
      "\nGravimetric.Capacity:", Gravimetric.Capacity
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Atomic.Fraction.Discharge i Gravimetric.Capacity")) +
  theme_minimal(),
  tooltip = "text"
)
## Warning in geom_point(aes(x = Atomic.Fraction.Discharge, y =
## Gravimetric.Capacity, : Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'
ggplotly(
ggplot(data, aes(x = Average.Voltage, y = Gravimetric.Energy)) +
  geom_point(aes(
    x = Average.Voltage, 
    y = Gravimetric.Energy, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nAverage.Voltage:", Average.Voltage, 
      "\nGravimetric.Energy:", Gravimetric.Energy
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Average.Voltage i Gravimetric.Energy")) +
  theme_minimal(),
  tooltip = "text"
)
## Warning in geom_point(aes(x = Average.Voltage, y = Gravimetric.Energy, text =
## paste("ID baterii:", : Ignoring unknown aesthetics: text
## `geom_smooth()` using formula = 'y ~ x'

7 Najważniejsze trendy w badaniu

8 Predykcja dalszych cech